from datetime import datetime

import pandas as pd

normal_cols = ['id', 'node', 'segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.urg',
               'tcpflags.psh', 'tcpflags.syn-ack', 'tcpflags.fin', 'source.ip', 'destination.ip', 'source.port',
               'source.packets', 'source.bytes', 'destination.port', 'destination.bytes', 'destination.packets',
               'initRTT', 'firstPacket', 'lastPacket', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes',
               'totDataBytes', 'network.packets', 'network.bytes', 'length', 'client.bytes', 'http.uri',
               'http.uri_length_mean', 'http.uri_length_var', 'http.uri_param_count_mean', 'http.uri_param_count_var',
               'http.uri_depth_mean', 'http.uri_depth_var', 'http.uri_filename_length_mean',
               'http.uri_filename_length_var', 'http.response-content-type', 'http.bodyMagicCnt', 'http.statuscodeCnt',
               'http.clientVersionCnt', 'http.response-content-typeCnt', 'http.xffIpCnt', 'http.requestHeaderCnt',
               'http.serverVersion', 'http.serverVersionCnt', 'http.responseHeaderCnt', 'http.xffIp',
               'http.clientVersion', 'http.uriTokens', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic',
               'http.request-content-type', 'http.uriCnt', 'http.useragent', 'http.keyCnt', 'http.request-referer',
               'http.request-refererCnt', 'http.path', 'http.hostCnt', 'http.host', 'http.response-server',
               'http.pathCnt', 'http.useragentTokens', 'http.methodCnt', 'http.method', 'http.method-GET',
               'http.method-POST', 'http.key', 'http.hostTokens', 'http.requestHeader', 'http.responseHeader',
               'dns.ASN', 'dns.RIR', 'dns.GEO', 'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.ipCnt', 'dns.OpCode',
               'dns.OpCodeCnt', 'dns.Puny', 'dns.PunyCnt', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType',
               'dns.QueryTypeCnt', 'dns.status', 'dns.hostCnt', 'dns.host', 'dns.statusCnt', 'tls.cipher',
               'tls.cipherCnt', 'tls.dstSessionId', 'tls.ja3', 'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4',
               'tls.ja4Cnt', 'tls.srcSessionId', 'tls.version', 'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt',
               'packetPos', 'traffic_type', 'PROTOCOL', 'DENY_METHOD', 'THREAT_SUMMARY', 'SEVERITY',
               'source.ip_Country_IsoCode', 'source.ip_Country_Name', 'source.ip_Country_SpecificName',
               'source.ip_Country_SpecificIsoCode', 'source.ip_City_Name', 'source.ip_City_PostalCode',
               'source.ip_Location_Latitude', 'source.ip_Location_Longitude', 'destination.ip_Country_IsoCode',
               'destination.ip_Country_Name', 'destination.ip_Country_SpecificName',
               'destination.ip_Country_SpecificIsoCode', 'destination.ip_City_Name', 'destination.ip_City_PostalCode',
               'destination.ip_Location_Latitude', 'destination.ip_Location_Longitude',
               'URI_FEATURES_EXTRA_param_count', 'URI_FEATURES_EXTRA_path_depth', 'URI_FEATURES_EXTRA_param_length_avg',
               'URI_FEATURES_EXTRA_param_length_max', 'URI_FEATURES_EXTRA_contains_sql',
               'URI_FEATURES_EXTRA_contains_xss', 'URI_FEATURES_EXTRA_contains_cmd', 'URI_FEATURES_EXTRA_contains_path',
               'URI_FEATURES_EXTRA_contains_redirect', 'URI_FEATURES_EXTRA_contains_danger',
               'URI_FEATURES_EXTRA_contains_suspicious_ext', 'UserAgent_is_attack', 'UserAgent_is_enterprise',
               'UserAgent_browser', 'UserAgent_browser_version', 'UserAgent_os', 'UserAgent_os_version',
               'UserAgent_device_type', 'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language',
               'UserAgent_special_char_count', 'UserAgent_is_unknown', 'dns_host_is_long_domain',
               'dns_host_is_random_characters', 'dns_host_is_special_characters', 'dns_host_is_large_subdomains',
               'dns_host_is_danger_domain', 'dns_host_is_danger_subdomain', 'dns_host_is_uncommon_tld', 'src_host',
               'src_user_agent', 'src_content_length', 'src_connection', 'src_content_type', 'src_x_forwarded_for',
               'src_accept', 'src_accept_encoding', 'src_accept_language', 'src_referer', 'src_cookie', 'src_origin',
               'src_x_requested_with', 'src_sec_fetch_mode', 'src_sec_fetch_site', 'src_sec_fetch_dest',
               'src_content_encoding', 'src_sec_ch_ua', 'src_sec_ch_ua_mobile', 'src_sec_ch_ua_platform',
               'src_cache_control', 'src_upgrade_insecure_requests', 'src_pragma', 'src_x_real_ip', 'src_soapaction',
               'src_x_forwarded_proto', 'src_x_forwarded_host', 'src_x_request_id', 'src_x_forwarded_port',
               'src_x_forwarded_scheme', 'src_x_original_forwarded_for', 'src_x_scheme', 'src_x_forwarded_scheme_diy',
               'src_authorization', 'src_trace_id', 'src_distinct_id', 'src_url_path', 'src_if_modified_since',
               'src_ecid_context', 'src_token', 'src_access_control_request_method', 'src_access_token', 'src_expect',
               'src_access_control_request_headers', 'src_sec_fetch_user', 'src_snk_location', 'src_accept_charset',
               'src_date', 'src_usercode', 'src_logincode', 'src_range', 'src_checktime', 'src_client_lang',
               'src_requestsource', 'src_if_range', 'src_if_none_match', 'src_sw8', 'src_sw8_correlation', 'src_sw8_x',
               'src_x_sign', 'src_x_timestamp', 'src_charset', 'src_x_custom_header', 'src_sec_websocket_key',
               'src_sec_websocket_version', 'src_upgrade', 'src_sec_websocket_extensions', 'src_xms_auth_token',
               'src_xms_cluster_id', 'src_purpose', 'src_x_ti_app_id', 'src_x_ti_secret_code',
               'src_x_prototype_version', 'src_systemcode', 'src_access_control_request_private_network', 'src_wxr',
               'src_client_version', 'src_requestid', 'src_request_module', 'src_x_mule_encoding', 'src_x_mule_session',
               'src_elite_tag', 'src_dnt', 'src_x_mule_endpoint', 'src_x_mule_root_message_id', 'src_x_mass_tappid',
               'src_x_elastic_product_origin', 'src_priority', 'src_server', 'src_transfer_encoding',
               'src_xxl_job_access_token', 'src_te', 'src_access_control_allow_origin', 'src_eset_spread_control',
               'src_x_splunk_digest', 'src_x_splunk_lm_nonce', 'src_x_splunk_lm_signature', 'src_x_splunk_lm_timestamp',
               'src_avb_version', 'src_set_cookie', 'src_x_oracle_dms_ecid', 'src_x_oracle_dms_rid',
               'src_x_prometheus_scrape_timeout_seconds', 'src_client', 'src_post', 'src_ucosessionid', 'src_user',
               'src_winuser', 'src_showloading', 'src_digest', 'src_ocrimageflag', 'src_x_content_type_options',
               'src_access_control_allow_methods', 'src_access_control_allow_credentials', 'src_expires',
               'src_keep_alive', 'src_url_token', 'src_postman_token', 'src_x_xss_protection', 'src_vary',
               'src_access_control_allow_headers', 'src_signdata', 'src_ua_cpu', 'src_authorg', 'src_yssip',
               'src_yssmac', 'src_location', 'src_content_language', 'src_git_protocol', 'src_x_forwarded_prefix',
               'src_invoke_type', 'src_x_ua_compatible', 'src_x_bd_traceid', 'src_x_from_h3_trnet', 'src_n',
               'src_nx_anti_csrf_token', 'src_proxy_connection', 'src_via', 'src_x_nexus_ui', 'src_pragma_type',
               'src_appkey', 'src_area', 'src_check', 'src_clientname', 'src_lang', 'src_networktype', 'src_nonce',
               'src_osversion', 'src_screen', 'src_st', 'src_timestamp', 'src_userid', 'src_x_frame_options',
               'src_x_b3_parentspanid', 'src_x_b3_sampled', 'src_x_b3_spanid', 'src_x_b3_traceid', 'src_sec_purpose',
               'src_kbn_version', 'src_access_control_expose_headers', 'src_iszip', 'src_from', 'src_state',
               'src_access_control_max_age', 'src_allow', 'src_kbn_system_request', 'src_tracelogcontext', 'src_etag',
               'src_ms_cv', 'src_x_application_context', 'src_x_csrftoken', 'src_tlogtraceid', 'src_link_pwd_token',
               'src_preip', 'src_preivkapp', 'src_preivkhost', 'src_tlogspanid', 'src_accesstoken', 'src_icy_metadata',
               'src_vivo_browser_text_zoom', 'src_contents_client_adapter_id', 'src_last_modified',
               'src_need_select_sub_url', 'src_x_bd_quic', 'src_x_bdboxapp_netengine', 'src_x_playback_session_id',
               'src_x_turbonet_info', 'src_cas_tgc', 'src_imgids', 'src_jsessionid', 'src_accept_ranges',
               'src_am_traceid', 'src_xms_backend_server', 'src_ss_language', 'src_sssessionid', 'src_username',
               'src_content_md5', 'src_x_acs_security_token', 'src_x_log_apiversion', 'src_x_log_bodyrawsize',
               'src_x_log_compresstype', 'src_x_log_signaturemethod', 'src_x_nod32_mode', 'src_client_appname',
               'src_client_requesttoken', 'src_client_requestts', 'src_exconfiginfo', 'src_long_pulling_timeout',
               'src_dispatch_header', 'src_sign', 'src_strict_transport_security', 'src_ts', 'src_bdpparallelload',
               'src_referrer_policy', 'src_x_amz_request_id', 'src_x_cm_service', 'src_x_miorigin',
               'src_x_seafile_client_version', 'src_amz_sdk_invocation_id', 'src_amz_sdk_request', 'src_amz_sdk_retry',
               'src_bussno', 'src_busstype', 'src_classcode', 'src_comcode', 'src_createtime', 'src_createuser',
               'src_imgfilepath', 'src_imgid', 'src_imgtype', 'src_s_cnection', 'src_storagecondition',
               'src_x_eset_updateid', 'src_x_powered_by', 'src_allow_cross_domain_redirect', 'src_amp_cache_transform',
               'src_x_csrf_token', 'src_x_check_exist', 'src_sec_gpc', 'src_sf_ajax', 'src_request_type',
               'src_x_envoy_decorator_operation', 'src_x_envoy_upstream_service_time', 'src_x_from_cdn',
               'src_x_download_options', 'src_x_permitted_cross_domain_policies', 'src_busscode', 'src_companyno',
               'src_fcpos', 'src_forwarded', 'src_prod_sw8', 'src_prod_sw8_correlation', 'src_prod_sw8_x',
               'src_rewritepath', 'src_x_elastic_product', 'src_x_forwarded_server', 'src_in_form_img', 'src_x_pjax',
               'src_age', 'src_channel', 'src_client_ip', 'src_content_transfer_encoding', 'src_guid', 'src_oldchannel',
               'src_product', 'src_remote_addr', 'src_seafile_repo_token', 'src_starttag', 'src_starttype',
               'src_traceid', 'src_warded_for', 'src_x_aspnet_version', 'src_zcid', 'src_agent_version', 'src_alt_svc',
               'src_cf_ray', 'src_chrome_proxy', 'src_content_disposition', 'src_content_range',
               'src_content_security_policy', 'src_d_for', 'src_grpc_accept_encoding', 'src_grpc_timeout',
               'src_gslb_okhttp', 'src_jenkins_crumb', 'src_nel', 'src_orderid', 'src_p3p', 'src_report_to',
               'src_route_data', 'src_service_worker', 'src_tap_app_conf_ver', 'src_tap_gslb', 'src_tc_anp',
               'src_tc_entsig', 'src_tc_spanid', 'src_tc_traceid', 'src_www_authenticate', 'src_x_aggregate_auth',
               'src_x_amz_storage_class', 'src_x_cache_status', 'src_x_ccc', 'src_x_cdn_request_id', 'src_x_cid',
               'src_x_clickhouse_format', 'src_x_clickhouse_query_id', 'src_x_clickhouse_server_display_name',
               'src_x_clickhouse_summary', 'src_x_clickhouse_timezone', 'src_x_gitlab_feature_category',
               'src_x_link_via', 'src_x_ucbrowser_ua', 'dst_date', 'dst_content_type', 'dst_content_length',
               'dst_transfer_encoding', 'dst_connection', 'dst_set_cookie', 'dst_x_oracle_dms_ecid',
               'dst_x_oracle_dms_rid', 'dst_content_language', 'dst_cache_control', 'dst_server', 'dst_expires',
               'dst_pragma', 'dst_access_control_allow_origin', 'dst_keep_alive', 'dst_vary', 'dst_last_modified',
               'dst_access_control_allow_credentials', 'dst_access_control_allow_methods', 'dst_accept_ranges',
               'dst_access_control_allow_headers', 'dst_x_content_type_options', 'dst_etag',
               'dst_access_control_max_age', 'dst_content_encoding', 'dst_access_control_expose_headers',
               'dst_x_xss_protection', 'dst_access_control_request_headers', 'dst_x_frame_options', 'dst_location',
               'dst_x_powered_by', 'dst_x_application_context', 'dst_p3p', 'dst_x_ua_compatible', 'dst_x_mule_encoding',
               'dst_x_mule_session', 'dst_s_cnection', 'dst_x_amz_request_id', 'dst_content_disposition',
               'dst_x_amz_storage_class', 'dst_content_range', 'dst_pragma_type', 'dst_allow', 'dst_xms_backend_server',
               'dst_x_clickhouse_server_display_name', 'dst_x_clickhouse_summary', 'dst_x_clickhouse_format',
               'dst_x_clickhouse_query_id', 'dst_x_clickhouse_timezone', 'dst_referrer_policy',
               'dst_content_security_policy', 'dst_content_id', 'dst_sec_websocket_accept', 'dst_upgrade',
               'dst_tlogtraceid', 'dst_sec_websocket_extensions', 'dst_x_elastic_product', 'dst_via', 'dst_x_cache',
               'dst_x_amz_cf_id', 'dst_x_amz_cf_pop', 'dst_rgwx_embedded_metadata_len', 'dst_rgwx_mtime',
               'dst_rgwx_obj_pg_ver', 'dst_rgwx_object_size', 'dst_rgwx_source_zone_short_id', 'dst_x_amz_version_id',
               'dst_x_request_id', 'dst_x_envoy_decorator_operation', 'dst_x_envoy_upstream_service_time', 'dst_age',
               'dst_www_authenticate', 'dst_strict_transport_security', 'dst_terminationurl', 'dst_x_splunk_digest',
               'dst_x_splunk_lm_nonce', 'dst_x_splunk_lm_timestamp', 'dst_x_download_options',
               'dst_x_permitted_cross_domain_policies', 'dst_pragrma', 'dst_content_transfer_encoding', 'dst_x_runtime',
               'dst_x_arequestid', 'dst_x_ausername', 'dst_x_asessionid', 'dst_x_aspnet_version',
               'dst_x_seraph_loginreason', 'dst_accept_charset', 'dst_n', 'dst_error_code', 'dst_error_msg',
               'dst_x_w_no', 'dst_kbn_license_sig', 'dst_kbn_name', 'dst_cross_origin_opener_policy',
               'dst_xdomainrequestallowed', 'dst_x_ratelimit_limit_vass_zuul_api_user_24',
               'dst_x_ratelimit_remaining_vass_zuul_api_use', 'dst_x_ratelimit_remaining_vass_zuul_wx_port',
               'dst_x_ratelimit_reset_vass_zuul_api_user_24', 'dst_audit_id', 'dst_x_kubernetes_pf_flowschema_uid',
               'dst_x_kubernetes_pf_prioritylevel_uid', 'dst_alt_svc', 'dst_cf_ray', 'dst_nel', 'dst_report_to',
               'dst_x_ratelimit_remaining_vass_zuul_api_ord', 'dst_x_protected_by', 'dst_traceresponse',
               'dst_x_br_response', 'dst_x_cache_status', 'dst_x_ratelimit_limit_vass_zuul_api_order_1',
               'dst_x_ratelimit_reset_vass_zuul_api_order_1', 'dst_grpc_metadata_accept_encoding',
               'dst_grpc_metadata_content_type', 'dst_grpc_metadata_grpc_accept_encoding',
               'dst_x_ratelimit_limit_vass_zuul_wx_port_240', 'dst_x_ratelimit_reset_vass_zuul_wx_port_240',
               'dst_permissions_policy', 'dst_ctl_cache_status', 'dst_progma', 'dst_request_id', 'dst_x_csrf_token',
               'dst_x_log_append_meta', 'dst_x_log_requestid', 'dst_x_log_time', 'dst_hostname',
               'dst_x_networkmanager_status', 'dst_browseruid', 'dst_k_cache_status', 'dst_kcs_via', 'dst_x_ccc',
               'dst_x_cid', 'dst_page_title', 'dst_x_via', 'dst_x_ws_request_id', 'dst_enable_encrypted_library',
               'dst_expire', 'dst_x_ratelimit_limit_vass_zuul_wx_port_39.',
               'dst_x_ratelimit_reset_vass_zuul_wx_port_39.', 'dst_content_location', 'dst_gsid', 'dst_sc',
               'dst_x_response_timestrap', 'dst_link', 'dst_x_ratelimit_limit_vass_zuul_api_order_2',
               'dst_x_ratelimit_limit_vass_zuul_wx_port_10.', 'dst_x_ratelimit_reset_vass_zuul_api_order_2',
               'dst_x_ratelimit_reset_vass_zuul_wx_port_10.', 'dst_x_reqid', 'dst_login', 'dst_x_cache_lookup',
               'dst_x_cdn_request_id', 'dst_x_link_via', 'dst_x_nws_log_uuid',
               'dst_x_ratelimit_limit_vass_zuul_wx_port_111', 'dst_x_ratelimit_reset_vass_zuul_wx_port_111',
               'dst_cache_contror', 'dst_cdn_cache', 'dst_cdn_cachedat', 'dst_cdn_edgestorageid', 'dst_cdn_proxyver',
               'dst_cdn_pullzone', 'dst_cdn_requestcountrycode', 'dst_cdn_requestid', 'dst_cdn_requestpullcode',
               'dst_cdn_requestpullsuccess', 'dst_cdn_status', 'dst_cdn_uid', 'dst_gitlab_ci_builds_polling',
               'dst_new_jwt', 'dst_ohc_cache_hit', 'dst_ohc_file_size', 'dst_ohc_global_saved_time', 'dst_x_hudson',
               'dst_x_instance_identity', 'dst_x_jenkins', 'dst_x_jenkins_session', 'dst_dir_perm', 'dst_fndfs_error',
               'dst_oid', 'dst_x_errno', 'dst_x_hudson_theme', 'dst_x_oss_hash_crc64ecma', 'dst_x_ser',
               'dst_content_md5', 'dst_exception', 'dst_exceptiontype', 'dst_praga', 'dst_x_oss_object_type',
               'dst_x_oss_request_id', 'dst_x_oss_server_time', 'dst_x_oss_storage_class',
               'dst_x_ratelimit_limit_vass_zuul_api_order_3', 'dst_x_ratelimit_reset_vass_zuul_api_order_3',
               'packet_size_mean', 'same_src_dst_size_mean', 'same_src_dst_size_var', 'packet_size_variance',
               'packet_len_total_count', 'packet_len_total_average', 'packet_len_total_min', 'packet_len_total_max',
               'packet_len_total_rate', 'packet_len_total_percent', 'packet_len_0_19_count', 'packet_len_0_19_average',
               'packet_len_0_19_min', 'packet_len_0_19_max', 'packet_len_0_19_rate', 'packet_len_0_19_percent',
               'packet_len_20_39_count', 'packet_len_20_39_average', 'packet_len_20_39_min', 'packet_len_20_39_max',
               'packet_len_20_39_rate', 'packet_len_20_39_percent', 'packet_len_40_79_count',
               'packet_len_40_79_average', 'packet_len_40_79_min', 'packet_len_40_79_max', 'packet_len_40_79_rate',
               'packet_len_40_79_percent', 'packet_len_80_159_count', 'packet_len_80_159_average',
               'packet_len_80_159_min', 'packet_len_80_159_max', 'packet_len_80_159_rate', 'packet_len_80_159_percent',
               'packet_len_160_319_count', 'packet_len_160_319_average', 'packet_len_160_319_min',
               'packet_len_160_319_max', 'packet_len_160_319_rate', 'packet_len_160_319_percent',
               'packet_len_320_639_count', 'packet_len_320_639_average', 'packet_len_320_639_min',
               'packet_len_320_639_max', 'packet_len_320_639_rate', 'packet_len_320_639_percent',
               'packet_len_640_1279_count', 'packet_len_640_1279_average', 'packet_len_640_1279_min',
               'packet_len_640_1279_max', 'packet_len_640_1279_rate', 'packet_len_640_1279_percent',
               'packet_len_1280_2559_count', 'packet_len_1280_2559_average', 'packet_len_1280_2559_min',
               'packet_len_1280_2559_max', 'packet_len_1280_2559_rate', 'packet_len_1280_2559_percent',
               'packet_len_2560_5119_count', 'packet_len_2560_5119_average', 'packet_len_2560_5119_min',
               'packet_len_2560_5119_max', 'packet_len_2560_5119_rate', 'packet_len_2560_5119_percent',
               'packet_len_more_than_5120_count', 'packet_len_more_than_5120_average', 'packet_len_more_than_5120_min',
               'packet_len_more_than_5120_max', 'packet_len_more_than_5120_rate', 'packet_len_more_than_5120_percent',
               'all_req_packet_size_mean', 'all_req_packet_size_var', 'all_res_packet_size_mean',
               'all_res_packet_size_var', 'all_req_packet_time_period_mean', 'all_res_packet_time_period_mean',
               'all_req_packet_time_period_var', 'all_res_packet_time_period_var', 'req_header_count_mean',
               'req_header_count_var', 'dns_base_domain', 'dns_base_domain_length', 'dns_domain_suffix',
               'dns_domain_suffix_length', 'dns_domain', 'dns_domain_length', 'dns_domain_length_mean',
               'dns_domain_length_var', 'req_res_period_mean', 'req_res_period_var', 'status_code_1x_count',
               'status_code_2x_count', 'status_code_3x_count', 'status_code_4x_count', 'status_code_5x_count',
               'req_bytes_percentage', 'res_bytes_percentage', 'cookie_end_with_semicolon_count', 'ua_duplicate_count',
               'origin_isDangerous', 'isDangerous']
webshell_cols = ['segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.psh', 'tcpflags.syn-ack',
                 'tcpflags.fin', 'source.packets', 'source.bytes', 'destination.port', 'destination.bytes',
                 'destination.packets', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes', 'totDataBytes',
                 'network.packets', 'network.bytes', 'client.bytes', 'http.uri_length_mean', 'http.uri_length_var',
                 'http.uri_param_count_mean', 'http.uri_param_count_var', 'http.uri_depth_mean', 'http.uri_depth_var',
                 'http.uri_filename_length_mean', 'http.uri_filename_length_var', 'http.response-content-type',
                 'http.bodyMagicCnt', 'http.clientVersionCnt', 'http.response-content-typeCnt', 'http.xffIpCnt',
                 'http.requestHeaderCnt', 'http.serverVersion', 'http.serverVersionCnt', 'http.responseHeaderCnt',
                 'http.clientVersion', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic',
                 'http.request-content-type', 'http.uriCnt', 'http.keyCnt', 'http.hostCnt', 'http.response-server',
                 'http.pathCnt', 'http.methodCnt', 'http.method', 'http.method-GET', 'http.method-POST', 'http.key',
                 'http.hostTokens', 'dns.ASN', 'dns.RIR', 'dns.GEO', 'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.ipCnt',
                 'dns.OpCode', 'dns.OpCodeCnt', 'dns.Puny', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType',
                 'dns.QueryTypeCnt', 'dns.status', 'dns.host', 'dns.statusCnt', 'tls.cipher', 'tls.cipherCnt',
                 'tls.dstSessionId', 'tls.ja3', 'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4', 'tls.ja4Cnt',
                 'tls.srcSessionId', 'tls.version', 'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt',
                 'URI_FEATURES_EXTRA_param_count', 'URI_FEATURES_EXTRA_path_depth',
                 'URI_FEATURES_EXTRA_param_length_avg', 'URI_FEATURES_EXTRA_param_length_max',
                 'URI_FEATURES_EXTRA_contains_sql', 'URI_FEATURES_EXTRA_contains_xss',
                 'URI_FEATURES_EXTRA_contains_cmd', 'URI_FEATURES_EXTRA_contains_path',
                 'URI_FEATURES_EXTRA_contains_redirect', 'URI_FEATURES_EXTRA_contains_danger',
                 'URI_FEATURES_EXTRA_contains_suspicious_ext', 'UserAgent_is_attack', 'UserAgent_is_enterprise',
                 'UserAgent_browser', 'UserAgent_os', 'UserAgent_device_type', 'UserAgent_platform', 'UserAgent_is_bot',
                 'UserAgent_language', 'UserAgent_special_char_count', 'UserAgent_is_unknown',
                 'dns_host_is_long_domain', 'dns_host_is_random_characters', 'dns_host_is_special_characters',
                 'dns_host_is_large_subdomains', 'dns_host_is_danger_domain', 'dns_host_is_danger_subdomain',
                 'dns_host_is_uncommon_tld', 'src_host', 'src_content_length', 'src_connection', 'src_content_type',
                 'src_x_forwarded_for', 'src_accept', 'src_accept_encoding', 'src_accept_language', 'src_referer',
                 'src_origin', 'src_x_requested_with', 'src_sec_fetch_mode', 'src_sec_fetch_site', 'src_sec_fetch_dest',
                 'src_content_encoding', 'src_sec_ch_ua', 'src_sec_ch_ua_mobile', 'src_sec_ch_ua_platform',
                 'src_cache_control', 'src_upgrade_insecure_requests', 'src_pragma', 'src_x_real_ip', 'src_soapaction',
                 'src_x_forwarded_proto', 'src_x_forwarded_host', 'src_x_request_id', 'src_x_forwarded_port',
                 'src_x_forwarded_scheme', 'src_x_original_forwarded_for', 'src_authorization', 'src_trace_id',
                 'src_distinct_id', 'src_url_path', 'src_if_modified_since', 'src_ecid_context', 'src_token',
                 'src_access_control_request_method', 'src_access_token', 'src_access_control_request_headers',
                 'src_sec_fetch_user', 'src_accept_charset', 'src_usercode', 'src_logincode', 'src_range',
                 'src_checktime', 'src_requestsource', 'src_if_range', 'src_if_none_match', 'src_sw8', 'src_x_sign',
                 'src_x_timestamp', 'src_charset', 'src_sec_websocket_key', 'src_sec_websocket_extensions',
                 'src_xms_auth_token', 'src_x_prototype_version', 'src_systemcode', 'src_client_version',
                 'src_requestid', 'src_x_mule_encoding', 'src_x_mule_session', 'src_x_mule_root_message_id',
                 'src_priority', 'src_server', 'src_transfer_encoding', 'src_xxl_job_access_token', 'src_te',
                 'src_access_control_allow_origin', 'src_eset_spread_control', 'src_x_splunk_digest',
                 'src_x_splunk_lm_nonce', 'src_x_splunk_lm_signature', 'src_x_splunk_lm_timestamp', 'src_set_cookie',
                 'src_x_oracle_dms_ecid', 'src_client', 'src_post', 'src_ucosessionid', 'src_user', 'src_winuser',
                 'src_digest', 'src_access_control_allow_methods', 'src_expires', 'src_keep_alive', 'src_postman_token',
                 'src_x_xss_protection', 'src_vary', 'src_access_control_allow_headers', 'src_signdata', 'src_yssip',
                 'src_location', 'src_content_language', 'src_x_forwarded_prefix', 'src_x_ua_compatible',
                 'src_x_bd_traceid', 'src_n', 'src_nx_anti_csrf_token', 'src_proxy_connection', 'src_via', 'src_area',
                 'src_check', 'src_clientname', 'src_networktype', 'src_nonce', 'src_osversion', 'src_screen', 'src_st',
                 'src_timestamp', 'src_userid', 'src_x_frame_options', 'src_kbn_version',
                 'src_access_control_expose_headers', 'src_from', 'src_access_control_max_age', 'src_allow', 'src_etag',
                 'src_x_csrftoken', 'src_tlogtraceid', 'src_preip', 'src_preivkapp', 'src_preivkhost', 'src_tlogspanid',
                 'src_vivo_browser_text_zoom', 'src_contents_client_adapter_id', 'src_x_playback_session_id',
                 'src_imgids', 'src_am_traceid', 'src_xms_backend_server', 'src_sssessionid', 'src_client_requesttoken',
                 'src_client_requestts', 'src_strict_transport_security', 'src_bdpparallelload', 'src_referrer_policy',
                 'src_x_amz_request_id', 'src_x_miorigin', 'src_bussno', 'src_comcode', 'src_createuser',
                 'src_imgfilepath', 'src_imgid', 'src_x_powered_by', 'src_x_csrf_token',
                 'src_x_envoy_upstream_service_time', 'src_busscode', 'src_prod_sw8', 'src_age', 'src_client_ip',
                 'src_content_transfer_encoding', 'src_remote_addr', 'src_traceid', 'src_cf_ray',
                 'src_content_disposition', 'src_content_range', 'src_content_security_policy', 'src_jenkins_crumb',
                 'src_p3p', 'src_tc_anp', 'src_tc_entsig', 'src_tc_spanid', 'src_tc_traceid', 'src_www_authenticate',
                 'src_x_cache_status', 'src_x_cid', 'src_x_clickhouse_query_id', 'dst_content_type',
                 'dst_content_length', 'dst_transfer_encoding', 'dst_connection', 'dst_set_cookie',
                 'dst_x_oracle_dms_ecid', 'dst_content_language', 'dst_cache_control', 'dst_server', 'dst_expires',
                 'dst_pragma', 'dst_access_control_allow_origin', 'dst_keep_alive', 'dst_vary',
                 'dst_access_control_allow_methods', 'dst_access_control_allow_headers', 'dst_etag',
                 'dst_access_control_max_age', 'dst_content_encoding', 'dst_access_control_expose_headers',
                 'dst_x_xss_protection', 'dst_access_control_request_headers', 'dst_x_frame_options', 'dst_location',
                 'dst_x_powered_by', 'dst_p3p', 'dst_x_ua_compatible', 'dst_x_mule_encoding', 'dst_x_mule_session',
                 'dst_x_amz_request_id', 'dst_content_disposition', 'dst_content_range', 'dst_allow',
                 'dst_xms_backend_server', 'dst_x_clickhouse_query_id', 'dst_referrer_policy',
                 'dst_content_security_policy', 'dst_content_id', 'dst_sec_websocket_accept', 'dst_tlogtraceid',
                 'dst_sec_websocket_extensions', 'dst_via', 'dst_x_cache', 'dst_x_amz_cf_id', 'dst_x_amz_cf_pop',
                 'dst_rgwx_embedded_metadata_len', 'dst_rgwx_mtime', 'dst_rgwx_obj_pg_ver', 'dst_rgwx_object_size',
                 'dst_x_request_id', 'dst_x_envoy_upstream_service_time', 'dst_age', 'dst_www_authenticate',
                 'dst_strict_transport_security', 'dst_x_splunk_digest', 'dst_x_splunk_lm_nonce',
                 'dst_x_splunk_lm_timestamp', 'dst_content_transfer_encoding', 'dst_x_runtime', 'dst_x_arequestid',
                 'dst_x_ausername', 'dst_x_asessionid', 'dst_accept_charset', 'dst_n', 'dst_x_w_no',
                 'dst_kbn_license_sig', 'dst_kbn_name', 'dst_audit_id', 'dst_cf_ray', 'dst_x_cache_status',
                 'dst_ctl_cache_status', 'dst_request_id', 'dst_x_csrf_token', 'dst_hostname', 'dst_browseruid',
                 'dst_x_cid', 'dst_page_title', 'dst_x_ws_request_id', 'dst_gsid', 'dst_sc', 'dst_x_response_timestrap',
                 'dst_link', 'dst_x_reqid', 'dst_login', 'dst_x_instance_identity', 'dst_x_jenkins',
                 'dst_x_jenkins_session', 'packet_size_mean', 'packet_size_variance', 'packet_len_total_count',
                 'packet_len_total_average', 'packet_len_total_min', 'packet_len_total_max', 'packet_len_total_rate',
                 'packet_len_0_19_count', 'packet_len_0_19_average', 'packet_len_0_19_min', 'packet_len_0_19_max',
                 'packet_len_0_19_percent', 'packet_len_40_79_count', 'packet_len_40_79_average',
                 'packet_len_40_79_min', 'packet_len_40_79_max', 'packet_len_40_79_rate', 'packet_len_40_79_percent',
                 'packet_len_80_159_count', 'packet_len_80_159_average', 'packet_len_80_159_min',
                 'packet_len_80_159_max', 'packet_len_80_159_rate', 'packet_len_80_159_percent',
                 'packet_len_160_319_count', 'packet_len_160_319_average', 'packet_len_160_319_min',
                 'packet_len_160_319_max', 'packet_len_160_319_rate', 'packet_len_160_319_percent',
                 'packet_len_320_639_count', 'packet_len_320_639_average', 'packet_len_320_639_min',
                 'packet_len_320_639_max', 'packet_len_320_639_rate', 'packet_len_320_639_percent',
                 'packet_len_640_1279_count', 'packet_len_640_1279_average', 'packet_len_640_1279_min',
                 'packet_len_640_1279_max', 'packet_len_640_1279_rate', 'packet_len_640_1279_percent',
                 'packet_len_1280_2559_count', 'packet_len_1280_2559_average', 'packet_len_1280_2559_min',
                 'packet_len_1280_2559_max', 'packet_len_1280_2559_rate', 'packet_len_1280_2559_percent',
                 'packet_len_more_than_5120_count', 'packet_len_more_than_5120_average',
                 'packet_len_more_than_5120_min', 'packet_len_more_than_5120_max', 'packet_len_more_than_5120_percent',
                 'all_req_packet_size_mean', 'all_req_packet_size_var', 'all_res_packet_size_mean',
                 'all_res_packet_size_var', 'all_req_packet_time_period_mean', 'all_res_packet_time_period_mean',
                 'all_req_packet_time_period_var', 'all_res_packet_time_period_var', 'req_header_count_mean',
                 'dns_domain_length', 'dns_domain_length_mean', 'req_res_period_mean', 'req_res_period_var',
                 'status_code_1x_count', 'status_code_2x_count', 'status_code_3x_count', 'status_code_4x_count',
                 'status_code_5x_count', 'req_bytes_percentage', 'res_bytes_percentage',
                 'cookie_end_with_semicolon_count', 'ua_duplicate_count', 'isDangerous', 'dns.host_list']


def check_length(df, scene):
    if 'isDangerous' not in df.columns:
        df["isDangerous"] = False
    else:
        df["isDangerous"] = df["isDangerous"].fillna(False)
    if scene == "normal":
        for col in normal_cols:
            if col not in df.columns:
                df[col] = ""
        df['origin_isDangerous'] = df['isDangerous']
        return df[normal_cols]
    elif scene == "webshell":
        for col in webshell_cols:
            if col not in df.columns:
                df[col] = ""
        return df[webshell_cols]


scenes = ["normal", "webshell"]


def merge_data():
    paths = input("请输入第一个csv文件，用逗号分割:")
    out = input("请输入输出文件名:")
    scene = input("场景:")
    if scene not in scenes:
        print(f"场景不是{','.join(scenes)}")
        exit(0)
    df_list = []
    path_list = paths.split(",")
    for index, path in enumerate(path_list):
        print(f"正在读取读文件：{index + 1}/{path_list}")
        df = pd.read_csv(path)
        df = check_length(df, scene)
        df = df[df["protocol"].str.contains("http")]

        df_list.append(df)
    df = pd.concat(df_list, ignore_index=True)
    df = df.drop_duplicates()
    print(f"总长度：{df.shape}")
    pd.DataFrame({"合并时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "合并后的文件": out,
                  "用于合并的文件": paths}).to_csv("log.csv", index=False)
    print("已写出log.csv，可更新文件信息")
    df.to_csv(out, index=False)


if __name__ == '__main__':
    merge_data()
